*********************
**** Sankey JSON ****
********************* 
cd "/Users/ricardo/Documents/Dropbox (CIEP)/Sankey/"
global results "/Applications/XAMPP/xamppfiles/htdocs/Sankey"



***************************
*** 1. Import data base ***
/***************************
import delimited download_june_8_2016.csv, clear

destring age*, replace ignore("NA" "ND")
replace varname = strtoname(upper(trim(varname)))
keep if varname != ""

replace country = proper(trim(country)) if country != "US"
replace vartype = proper(trim(vartype)) if vartype != "NTA"

reshape long age, i(country year varname variablename vartype nominalorreal) j(ages)
rename age profile
rename ages age

drop agegroups preparedat preparedby uploadedby countryattribute*
drop unit attribute upperagegroup singleorfiveyear status nominalorreal

save Sankey0, replace


*******************/
** 1.1 Population **
use Sankey0, clear
tempvar pop
g `pop' = profile if varname == "DNUN"
egen pop = sum(`pop'), by(country age year)
drop if varname == "DNUN" | varname == "DN"


********************************
** 1.2 Vartype and duplicates **
drop if vartype == "NTA" | vartype == "Rate"
drop if vartype != "Mean" & (varname == "CFE" | varname == "CGE")
drop if vartype == "Mean" & (varname != "CFE" & varname != "CGE")

duplicates tag country age varname, g(tag)
table country varname if tag != 0
egen maxyear = max(year), by(country age varname)
drop if year != maxyear

* Most recent update *
duplicates tag country year age varname, g(tag2)
capture table country varname if tag2 != 0
if _rc == 0 {
	table country varname if tag2 != 0
	split uploadedat, g(up) parse("/" " ")
	destring up*, replace
	egen max1 = max(up1), by(country year age varname)
	drop if up1 != max1 & tag2 != 0
	egen max2 = max(up2), by(country year age varname)
	drop if up2 != max2 & tag2 != 0
	egen max3 = max(up3), by(country year age varname)
	drop if up3 != max3 & tag2 != 0

	split up4, g(time) parse(":")
	destring time*, replace
	egen maxtime1 = max(time1), by(country year age varname)
	drop if maxtime1 != time1 & tag2 != 0
	egen maxtime2 = max(time2), by(country year age varname)
	drop if maxtime2 != time2 & tag2 != 0
	egen maxtime3 = max(time3), by(country year age varname)
	drop if maxtime3 != time3 & tag2 != 0
	
	drop max* time* 
}
drop tag* vartype up*


*******************
* 1.3 Adjustments *
replace profile = profile*pop

tempvar TFO TGO
egen `TFO' = sum(profile) if varname == "TFO", by(country)
replace profile = -profile if `TFO' < 0
egen `TGO' = sum(profile) if varname == "TGO", by(country)
replace profile = -profile if `TGO' < 0


******************
** 1.4 Retained **
preserve
drop variablename
capture drop __*
reshape wide profile, i(country age year pop) j(varname) string

replace profileYAG = 0 if profileYAG < 0
replace profileYAF = 0 if profileYAF < 0

replace profileSG = 0 if profileSG > 0
replace profileSG = -profileSG

replace profileSF = 0 if profileSF > 0
replace profileSF = -profileSF

g profileROW = profileTFI + profileTGI - profileTFO - profileTGO
g profileRET = (profileYL + profileYAG + profileYAF + profileSG + profileSF - profileTFI - profileTGI + profileROW)
replace profileRET = 0 if profileRET < 0

keep profileYL profileYAG profileYAF profileSG profileSF profileTFO profileTGO profileTFI profileTGI profileRET profileROW country year age pop
keep profileRET profileROW country year age pop

reshape long profile, i(country year age pop) j(varname) string

tempfile RET
save `RET'

restore
append using `RET'


************************
** 1.5 Final database **
capture drop __*
save Sankey, replace



****************/
*** 2. Sankey ***
*****************
use Sankey, clear


***************
** 2.1 Ages ***
g ages = 1 if age < 25
replace ages = 2 if age >= 25 & age < 60
replace ages = 3 if age >= 60

* Labels *
label define ages 1 "Children" 2 "Adults" 3 "Elderly" 4 "ROW"
label values ages ages


*******************
** 2.2 Resources **
* Labor income *
g resources = 1 if varname == "YL"

* Asset income *
replace resources = 2 if (varname == "YAG" | varname == "YAF") & profile > 0		// Only POSSITIVE asset income.

* Dis-saving *
replace resources = 3 if (varname == "SG" | varname == "SF") & profile < 0
replace profile = -profile if resources == 3

* Labels *
label define resources 1 "Labor income" 2 "Asset income" 3 "Dis-saving"
label values resources resources


**************
** 2.3 Uses **
* Consumption *
*g uses = 1 if varname == "CG" | varname == "CF"
g uses = -3 if varname == "CFE" | varname == "CGE"
replace uses = -2 if varname == "CFH" | varname == "CGH"
replace uses = -1 if varname == "CFX" | varname == "CGX"

* Asset income *
replace uses = 2 if (varname == "YAG" | varname == "YAF") & profile < 0			// Only NEGATIVE asset income.
replace profile = -profile if uses == 2

* Saving *
replace uses = 3 if (varname == "SG" | varname == "SF") & resources == .

* Labels *
label define uses 1 "Consumption" 2 "Asset Income" 3 "Saving" -3 "Education" -2 "Health" -1 "Other consumption"
label values uses uses


*******************
** 2.4 Transfers **
* Transfer inflows *
g transfers = 1 if varname == "TFI" 
replace transfers = 2 if varname == "TGI"
replace transfers = 3 if varname == "TFO" 
replace transfers = 4 if varname == "TGO"
replace transfers = 5 if varname == "RET"

* Labels *
label define transfers 1 "Private" 2 "Public" 3 "Private" 4 "Public" 5 "Retained"
label values transfers transfers


****************
** 2.5 Sector **
g sector = 1 if substr(varname,2,1) == "F" | substr(varname,3,1) == "F" | varname == "YL"
replace sector = 2 if substr(varname,2,1) == "G" | substr(varname,3,1) == "G"
replace sector = . if resources == . & uses == . & transfers == .

label define sector 1 "Private" 2 "Public"
label values sector sector


**************
** 2.6 Save **
recast double profile
format profile %20.0fc
tempfile database0
save `database0'


****************
/** 2.7 Review **
foreach var in YL YAG YAF SG SF CFE CGE CFH CGH CFX CGX TFI TGI TFO TGO RET ROW {
	di _newline in g "Variable: `var'"
	tabstat profile if varname == "`var'", stat(sum) f(%20.0fc) by(country) nototal
}
bysort country: tabstat profile if transfers != ., by(sector) f(%20.0fc) stat(sum) save
**/


*********************
*** 3. By Country ***
*********************
levelsof country, l(countries)
foreach country of local countries {


	*************************
	** Database by country **
	local number = 0

	use `database0' if country == "`country'", clear
	noisily di "`country' `=year[1]'"

	* Database check *
	local exit ""
	foreach N in YL YAG YAF SG SF CFE CGE CFH CGH CFX CGX TFI TGI TFO TGO RET {
		count if varname == "`N'" & profile != . //& profile != 0
		if r(N) == 0 {
			local exit "exit"
			continue, break
		}
	}
	if "`exit'" == "exit" {
		continue
	}

	* Log file *
	local country = strtoname("`country'")
	local country = substr("`country'",1,10)
	tempfile sankey`country'
	capture quietly log using `sankey`country'', replace text
	noisily di in w "{"

	tempfile database
	save `database'



	*********
	*** 1 ***
	*********
	use `database', clear
	local from "resources"
	local to "ages"


	**************
	** Collapse **
	capture collapse (sum) profile if `to' != . & `from' != ., by(`from' `to')
	if _rc != 0 {
		continue
	}
	else {
		collapse (sum) profile if `to' != . & `from' != ., by(`from' `to')
	}
	gsort `to' `from'
	format profile %20.0fc


	*********************
	** Nodes and Flows **
	forvalues k=1(1)`=_N' {
		* FROM Nodes *
		local faccountname : label (`from') `=`from'[`k']'
		local faccountname = subinstr("`faccountname'"," ","_",.)
		capture confirm existence `node`=strtoname("`faccountname'")'`country''
		if _rc != 0 & profile[`k'] != 0 {
			local nodes`country' `"`nodes`country''{"name":"`faccountname'","id":"`from'"},"'
			local node`=strtoname("`faccountname'")'`country' = `number'
			local ++number
		}

		* TO Nodes *
		local taccountname : label (`to') `=`to'[`k']'
		local taccountname = subinstr("`taccountname'"," ","_",.)
		capture confirm existence `node`=strtoname("`taccountname'")'`country''
		if _rc != 0 & profile[`k'] != 0 {
			if age[`k'] == 1 {
				local id "children"
			}
			if age[`k'] == 2 {
				local id "adults"
			}
			if age[`k'] == 3 {
				local id "elderly"
			}
			local nodes`country' `"`nodes`country''{"name":"`taccountname'","id":"`id'"},"'
			local node`=strtoname("`taccountname'")'`country' = `number'
			local ++number
		}
		
		if profile[`k'] != 0 {
			local links`country' `"`links`country''{"target":`node`=strtoname("`taccountname'")'`country'',"value":`=profile[`k']',"source":`node`=strtoname("`faccountname'")'`country''},"'
		}
	}



	********/
	*** 2 ***
	*********
	use `database', clear
	local from "ages"
	local to "transfers"
	drop if transfers == 1 | transfers == 2


	**************
	** Collapse **
	capture collapse (sum) profile if `to' != . & `from' != ., by(`from' `to')
	if _rc != 0 {
		continue
	}
	else {
		collapse (sum) profile if `to' != . & `from' != ., by(`from' `to')
	}
	sort `to' `from'
	format profile %20.0fc


	*********************
	** Nodes and Flows **
	forvalues k=1(1)`=_N' {
		* FROM Nodes *
		local faccountname : label (`from') `=`from'[`k']'
		local faccountname = subinstr("`faccountname'"," ","_",.)
		capture confirm existence `node`=strtoname("`faccountname'")'`country''
		if _rc != 0 & profile[`k'] != 0 {
			if age[`k'] == 1 {
				local id "children"
			}
			if age[`k'] == 2 {
				local id "adults"
			}
			if age[`k'] == 3 {
				local id "elderly"
			}
			local nodes`country' `"`nodes`country''{"name":"`faccountname'","id":"`id'"},"'
			local node`=strtoname("`faccountname'")'`country' = `number'
			local ++number
		}

		* TO Nodes *
		local taccountname : label (`to') `=`to'[`k']'
		local taccountname = subinstr("`taccountname'"," ","_",.)
		capture confirm existence `node`=strtoname("`taccountname'")'`country''
		if _rc != 0 & profile[`k'] != 0 {
			local nodes`country' `"`nodes`country''{"name":"`taccountname'","id":""},"'
			local node`=strtoname("`taccountname'")'`country' = `number'
			local ++number
		}
		
		if profile[`k'] != 0 {
			local links`country' `"`links`country''{"target":`node`=strtoname("`taccountname'")'`country'',"value":`=profile[`k']',"source":`node`=strtoname("`faccountname'")'`country''},"'
		}
	}



	********/
	*** 3 ***
	*********
	use `database', clear
	local from "transfers"
	local to "ages"

	label define ages_ 1 "Children_" 2 "Adults_" 3 "Elderly_"
	label values ages ages_
	keep if transfers == 1 | transfers == 2 | transfers == 5


	**************
	** Collapse **
	capture collapse (sum) profile if `to' != . & `from' != ., by(`from' `to')
	if _rc != 0 {
		continue
	}
	else {
		collapse (sum) profile if `to' != . & `from' != ., by(`from' `to')
	}
	sort `to' `from'
	format profile %20.0fc


	*********************
	** Nodes and Flows **
	forvalues k=1(1)`=_N' {
		* FROM Nodes *
		local faccountname : label (`from') `=`from'[`k']'
		local faccountname = subinstr("`faccountname'"," ","_",.)
		capture confirm existence `node`=strtoname("`faccountname'")'`country''
		if _rc != 0 & profile[`k'] != 0 {			
			local nodes`country' `"`nodes`country''{"name":"`faccountname'","id":"`from'"},"'
			local node`=strtoname("`faccountname'")'`country' = `number'
			local ++number
		}

		* TO Nodes *
		local taccountname : label (`to') `=`to'[`k']'
		local taccountname = subinstr("`taccountname'"," ","_",.)
		capture confirm existence `node`=strtoname("`taccountname'")'`country''
		if _rc != 0 & profile[`k'] != 0 {
			if age[`k'] == 1 {
				local id "children"
			}
			if age[`k'] == 2 {
				local id "adults"
			}
			if age[`k'] == 3 {
				local id "elderly"
			}
			local nodes`country' `"`nodes`country''{"name":"`taccountname'","id":"`id'"},"'
			local node`=strtoname("`taccountname'")'`country' = `number'
			local ++number
		}
		
		if profile[`k'] != 0 {
			local links`country' `"`links`country''{"target":`node`=strtoname("`taccountname'")'`country'',"value":`=profile[`k']',"source":`node`=strtoname("`faccountname'")'`country''},"'
		}
	}



	********/
	*** 4 ***
	*********
	use `database', clear
	local from "ages"
	local to "uses"

	label define ages_ 1 "Children_" 2 "Adults_" 3 "Elderly_"
	label values ages ages_


	***************
	**  Collapse **
	capture collapse (sum) profile if `to' != . & `from' != ., by(`from' `to')
	if _rc != 0 {
		continue
	}
	else {
		collapse (sum) profile if `to' != . & `from' != ., by(`from' `to')
	}
	sort `to' `from'
	format profile %20.0fc


	*********************
	** Nodes and Flows **
	forvalues k=1(1)`=_N' {
		* FROM Nodes *
		local faccountname : label (`from') `=`from'[`k']'
		local faccountname = subinstr("`faccountname'"," ","_",.)
		capture confirm existence `node`=strtoname("`faccountname'")'`country''
		if _rc != 0 & profile[`k'] != 0 {
			if age[`k'] == 1 {
				local id "children"
			}
			if age[`k'] == 2 {
				local id "adults"
			}
			if age[`k'] == 3 {
				local id "elderly"
			}
			local nodes`country' `"`nodes`country''{"name":"`faccountname'","id":"`id'"},"'
			local node`=strtoname("`faccountname'")'`country' = `number'
			local ++number
		}

		* TO Nodes *
		local taccountname : label (`to') `=`to'[`k']'
		local taccountname = subinstr("`taccountname'"," ","_",.)
		capture confirm existence `node`=strtoname("`taccountname'")'`country''
		if _rc != 0 & profile[`k'] != 0 {
			local nodes`country' `"`nodes`country''{"name":"`taccountname'","id":"","id2":"`to'"},"'
			local node`=strtoname("`taccountname'")'`country' = `number'
			local ++number
		}
		
		if profile[`k'] != 0 {
			local links`country' `"`links`country''{"target":`node`=strtoname("`taccountname'")'`country'',"value":`=profile[`k']',"source":`node`=strtoname("`faccountname'")'`country''},"'
		}
	}



	********/
	*** 5 ***
	*********
	use `database', clear
	local from "sector"
	local to "row"


	******************************
	** PRIVATE -- PUBLIC -- ROW **
	keep if transfers != .
	replace profile = -profile if varname == "TFO" | varname == "TGO"

	tempname ROW ROWPri ROWPub
	capture tabstat profile if transfers != ., by(sector) f(%20.0fc) stat(sum) save
	if _rc == 0 {
		tabstat profile if transfers != ., by(sector) f(%20.0fc) stat(sum) save
		matrix `ROW' = r(StatTotal)
		matrix `ROWPri' = r(Stat1)
		matrix `ROWPub' = r(Stat2)
	}
	else {
		matrix `ROW' = 0
		matrix `ROWPri' = 0
		matrix `ROWPub' = 0
	}

	g row = 1 if `ROWPri'[1,1] < 0 & sector == 1
	replace row = 1 if `ROWPub'[1,1] < 0 & sector == 2
	replace profile = -profile if row == 1
	
	label define row 1 "ROW" 2 "ROW_"
	label values row row
	

	**************
	** Collapse **
	capture collapse (sum) profile if `from' != . & `to' != ., by(`from' `to')
	if _rc == 0 {
		sort `to' `from'
		format profile %20.0fc


		*********************
		** Nodes and Flows **
		forvalues k=1(1)`=_N' {
			* FROM Nodes *
			local faccountname : label (`from') `=`from'[`k']'
			local faccountname = subinstr("`faccountname'"," ","_",.)
			capture confirm existence `node`=strtoname("`faccountname'")'`country''
			if _rc != 0 & profile[`k'] != 0 {			
				local nodes`country' `"`nodes`country''{"name":"`faccountname'","id":"`from'"},"'
				local node`=strtoname("`faccountname'")'`country' = `number'
				local ++number
			}

			* TO Nodes *
			local taccountname : label (`to') `=`to'[`k']'
			local taccountname = subinstr("`taccountname'"," ","_",.)
			capture confirm existence `node`=strtoname("`taccountname'")'`country''
			if _rc != 0 & profile[`k'] != 0 {
				local nodes`country' `"`nodes`country''{"name":"`taccountname'","id":"`to'"},"'
				local node`=strtoname("`taccountname'")'`country' = `number'
				local ++number
			}
			
			if profile[`k'] != 0 {
				local links`country' `"`links`country''{"target":`node`=strtoname("`taccountname'")'`country'',"value":`=profile[`k']',"source":`node`=strtoname("`faccountname'")'`country''},"'
			}
		}
	}

	

	********/
	*** 6 ***
	*********
	use `database', clear
	local from "row"
	local to "sector"


	******************************
	** PRIVATE -- PUBLIC -- ROW **
	keep if transfers != .
	replace profile = -profile if varname == "TFO" | varname == "TGO"

	tempname ROW ROWPri ROWPub
	capture tabstat profile if transfers != ., by(sector) f(%20.0fc) stat(sum) save
	if _rc == 0 {
		tabstat profile if transfers != ., by(sector) f(%20.0fc) stat(sum) save
		matrix `ROW' = r(StatTotal)
		matrix `ROWPri' = r(Stat1)
		matrix `ROWPub' = r(Stat2)
	}
	else {
		matrix `ROW' = 0
		matrix `ROWPri' = 0
		matrix `ROWPub' = 0
	}

	g row = 2 if `ROWPri'[1,1] > 0 & sector == 1
	replace row = 2 if `ROWPub'[1,1] > 0 & sector == 2
	
	label define row 1 "ROW" 2 "ROW_"
	label values row row


	**************
	** Collapse **
	capture collapse (sum) profile if `from' != . & `to' != ., by(`from' `to')
	if _rc == 0 {
		sort `to' `from'
		format profile %20.0fc


		*********************
		** Nodes and Flows **
		forvalues k=1(1)`=_N' {
			* FROM Nodes *
			local faccountname : label (`from') `=`from'[`k']'
			local faccountname = subinstr("`faccountname'"," ","_",.)
			capture confirm existence `node`=strtoname("`faccountname'")'`country''
			if _rc != 0 & profile[`k'] != 0 {			
				local nodes`country' `"`nodes`country''{"name":"`faccountname'","id":"`from'"},"'
				local node`=strtoname("`faccountname'")'`country' = `number'
				local ++number
			}

			* TO Nodes *
			local taccountname : label (`to') `=`to'[`k']'
			local taccountname = subinstr("`taccountname'"," ","_",.)
			capture confirm existence `node`=strtoname("`taccountname'")'`country''
			if _rc != 0 & profile[`k'] != 0 {
				local nodes`country' `"`nodes`country''{"name":"`taccountname'","id":"`to'"},"'
				local node`=strtoname("`taccountname'")'`country' = `number'
				local ++number
			}
			
			if profile[`k'] != 0 {
				local links`country' `"`links`country''{"target":`node`=strtoname("`taccountname'")'`country'',"value":`=profile[`k']',"source":`node`=strtoname("`faccountname'")'`country''},"'
			}
		}
	}



	*************/
	*** OUTPUT ***
	**************
	if "`exit'" == "" {
		noisily di in w `""nodes": [ `=substr(`"`nodes`country''"',1,`=strlen(`"`nodes`country''"')'-1)'], "'
		noisily di in w `""links": [ `=substr(`"`links`country''"',1,`=strlen(`"`links`country''"')'-1)']}"'
		capture quietly log close

		tempfile sankey1 sankey2 sankey3
		*filefilter `sankey`country'' `sankey1', from(\r\n>) to("") replace	// Windows
		filefilter `sankey`country'' `sankey1', from(\n>) to("") replace		// Mac
		filefilter `sankey1' `sankey2', from(" ") to("") replace
		filefilter `sankey2' `sankey3', from("_") to(" ") replace
		filefilter `sankey3' "$results/`country'.json", from(".,") to("0") replace
	}
}
